Bayesian Machine Learning
Probabilistic Programming and Bayesian Methods for Hackers by Cameron Davidson-Pilon
S. Lall at Stanford
Reza Shadmehr at Johns Hopkins University
Table of Contents
Given the height $x$ of a person, decide whether the person is male ($y=1$) or female ($y=0$).
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import norm
from matplotlib import animation
% matplotlib inline
x = np.arange(130,200)
print(x)
x = np.arange(130,200)
mu0 = 160
mu1 = 173
sigma0 = 6.5
sigma1 = 6.5
L1 = norm.pdf(x,mu0,sigma0)
L2 = norm.pdf(x,mu1,sigma1)
prior0 = 1/2
prior1 = 1/2
Px = L1*prior0 + L2*prior1
posterior0 = (L1*prior0)/Px
posterior1 = (L2*prior1)/Px
var1 = posterior1 - posterior1**2;
plt.figure(figsize=(10,9))
plt.subplot(3,1,1)
plt.plot(x,L1,'r',x,L2,'b',x,Px,'k')
plt.axis([130, 200, 0, 0.1])
plt.text(mu0-10,0.08,'P(x|y=0)', color='r', fontsize=15)
plt.text(mu1,0.08,'P(x|y=1)', color='b', fontsize=15)
plt.text(185,0.03,'P(x)', fontsize=15)
plt.subplot(3,1,2),
plt.plot(x,L1*prior0,'r',x,L2*prior1,'b')
plt.axis([130, 200, 0, 0.1])
plt.text(mu0-10,0.05,'P(x|y=0)P(y=0)', color='r', fontsize=15)
plt.text(mu1,0.05,'P(x|y=1)P(y=1)', color='b', fontsize=15)
plt.subplot(3,1,3),
plt.plot(x,posterior0,'r',x,posterior1,'b',x,var1,'k')
plt.axis([130, 200, 0, 1.1])
plt.text(140,0.8,'P(y=0|x)', color='r', fontsize=15)
plt.text(180,0.8,'P(y=1|x)', color='b', fontsize=15)
plt.text(170,0.4,'var(y|x)', fontsize=15)
plt.show()
x = np.arange(130,200)
mu0 = 160
mu1 = 173
sigma0 = 6.5
sigma1 = 6.5
L1 = norm.pdf(x,mu0,sigma0)
L2 = norm.pdf(x,mu1,sigma1)
prior0 = 1/4
prior1 = 3/4
Px = L1*prior0 + L2*prior1
posterior0 = (L1*prior0)/Px
posterior1 = (L2*prior1)/Px
var1 = posterior1 - posterior1**2;
plt.figure(figsize=(10,9))
plt.subplot(3,1,1)
plt.plot(x,L1,'r',x,L2,'b',x,Px,'k')
plt.axis([130, 200, 0, 0.1])
plt.text(mu0-10,0.08,'P(x|y=0)', color='r', fontsize=15)
plt.text(mu1,0.08,'P(x|y=1)', color='b', fontsize=15)
plt.text(185,0.03,'P(x)', fontsize=15)
plt.subplot(3,1,2),
plt.plot(x,L1*prior0,'r',x,L2*prior1,'b')
plt.axis([130, 200, 0, 0.1])
plt.text(mu0-10,0.05,'P(x|y=0)P(y=0)', color='r', fontsize=15)
plt.text(mu1,0.05,'P(x|y=1)P(y=1)', color='b', fontsize=15)
plt.subplot(3,1,3),
plt.plot(x,posterior0,'r',x,posterior1,'b',x,var1,'k')
plt.axis([130, 200, 0, 1.1])
plt.text(140,0.8,'P(y=0|x)', color='r', fontsize=15)
plt.text(180,0.8,'P(y=1|x)', color='b', fontsize=15)
plt.text(170,0.4,'var(y|x)', fontsize=15)
plt.show()
x = np.arange(130,200)
mu0 = 160
mu1 = 173
sigma0 = 6.5
sigma1 = 15
L1 = norm.pdf(x,mu0,sigma0)
L2 = norm.pdf(x,mu1,sigma1)
prior0 = 1/2
prior1 = 1/2
Px = L1*prior0 + L2*prior1
posterior0 = (L1*prior0)/Px
posterior1 = (L2*prior1)/Px
var1 = posterior1 - posterior1**2;
plt.figure(figsize=(10,9))
plt.subplot(3,1,1)
plt.plot(x,L1,'r',x,L2,'b',x,Px,'k')
plt.axis([130, 200, 0, 0.045])
plt.text(mu0-20,0.03,'P(x|y=0)', color='r', fontsize=15)
plt.text(mu1,0.03,'P(x|y=1)', color='b', fontsize=15)
plt.text(158,0.027,'P(x)', fontsize=15)
plt.subplot(3,1,2),
plt.plot(x,L1*prior0,'r',x,L2*prior1,'b')
plt.axis([130, 200, 0, 0.045])
plt.text(mu0-28,0.015,'P(x|y=0)P(y=0)', color='r', fontsize=15)
plt.text(mu1+5,0.015,'P(x|y=1)P(y=1)', color='b', fontsize=15)
plt.subplot(3,1,3),
plt.plot(x,posterior0,'r',x,posterior1,'b',x,var1,'k')
plt.axis([130, 200, 0, 1.1])
plt.text(143,0.9,'P(y=0|x)', color='r', fontsize=15)
plt.text(180,0.8,'P(y=1|x)', color='b', fontsize=15)
plt.text(173,0.35,'var(y|x)', fontsize=15)
plt.show()
Examples of Gaussian decision regions
Lecture 23 in Learning Theory (Reza Shadmehr, Johns Hopkins University)
%%html
<center><iframe src="https://www.youtube.com/embed/3r5SlvjJptM?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/X1WB6IJqMjM?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
Start with prior beliefs, which can be thought of as a summary of opinions.
Given our prior, we can update our opinion, and produce a new opinion.
Iterate
%%html
<center><iframe src="https://player.vimeo.com/video/72116712"
width="640" height="360" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/J5uQcuQ_fJ0?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/w1u6-_2jQJo?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
Given model
H = 50 # if location of unknown object = 50
sigma = 20 # assume normal dist w/ sigma = 20
n = 100
x = np.arange(0,100)
likelihood = norm.pdf(x,H,sigma)
likelihood = likelihood/np.sum(likelihood) #normalizing
plt.figure(figsize=(8,6))
plt.plot(x,likelihood), plt.axis('tight'), plt.ylim([0,0.025])
plt.xlabel('D', fontsize=15)
plt.ylabel('P(D|H)', fontsize=15)
plt.title('H = {}'.format(H), fontsize=20)
plt.show()
Likelihood
L = np.zeros([n,n]) # likelihood
for h in range(n):
L[:,h] = norm.pdf(x,h,sigma)
L[:,h] = L[:,h]/np.sum(L[:,h])
plt.figure(figsize=(8,6))
plt.imshow(L, extent=[0,L.shape[0],L.shape[1],0]), plt.colorbar()
plt.xlabel('H', fontsize=15), plt.ylabel('D', fontsize=15), plt.title('Pr(D|H)', fontsize=20)
plt.show()
fig = plt.figure(figsize=(8,6))
ax = fig.gca(projection='3d')
X,Y = np.meshgrid(x,x)
surf = ax.plot_surface(X,Y,L, cmap='viridis', antialiased=False)
ax.view_init(28,-30)
plt.show()
Suppose $H = 45$
## run it to see animation
% matplotlib qt
n = 100
x = np.arange(0,n)
sigma = 20
L = np.zeros([n,n]) # likelihood
for h in range(n):
L[:,h] = norm.pdf(x,h,sigma).T
L[:,h] = L[:,h]/np.sum(L[:,h])
fig = plt.figure(figsize=(8, 6))
ax1 = fig.add_subplot(1, 1, 1)
graph1, = ax1.plot([], [], 'b')
graph2, = ax1.plot([], [], 'r+')
ax1.set_xlim(0, 100)
ax1.set_ylim(-0.01,0.15)
H = 45
pr = np.ones(n)/n # prior = uniform
dh = []
def init():
graph1.set_data([], [])
graph2.set_data([], [])
return graph1, graph2,
def animate(i):
global pr, po
if i > 50: anim.event_source.stop()
d = int(np.round(np.random.normal(H,sigma))) # observation
# make sure 1 <= d <= 100
if d <= 0: d = 1
elif d >= 100: d = 100
dh.append(d)
graph1.set_data(x,pr)
graph2.set_data(dh,np.zeros(np.size(dh)))
po = L[d,:]*pr
po = po/np.sum(po)
pr = po
return graph1, graph2,
anim = animation.FuncAnimation(fig, animate, init_func=init, interval=100, blit=True)
plt.show()
print(np.mean(dh))
print('I = {}'.format(np.argmax(po)))
% matplotlib inline
plt.figure(figsize=(8,6))
plt.hist(dh,21), plt.xlim([0,100])
plt.title('Histogram of D', fontsize=20)
plt.show()
%%html
<center><iframe src="https://www.youtube.com/embed/qsLF3KgavJk?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/dhsK-mRzxFw?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
iSystems Demo
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')